import pandas as pd
import numpy as np
import datetime
import itertools
from sklearn import *
import xgboost as xgb

dat = pd.read_csv('data.csv').dropna()
dat.columns = range(0, dat.shape[1])

#quake occurs within 3 weeks : 2 -> 1
#quake occurs within 6 weeks : 1 -> drop
#quake does not occur at this site : 0 -> 0
dat = dat.ix[dat[0] != 1, :]
labels = dat.iloc[:, 0].clip(0.0, 1.0)

train = dat.drop([0], axis=1)

params = {}
params["objective"] = "binary:logistic"
params["eta"] = 0.5
params["min_child_weight"] = 1
params["subsample"] = 0.5
params["colsample_bytree"] = 0.8
params["silent"] = 1
params["max_depth"] = 12
params["seed"] = 0
plst = list(params.items())
num_rounds = 3
    
k = 0
for train_index, test_index in cross_validation.KFold(n=train.shape[0], n_folds=8, shuffle=False, random_state=0):
    X_create = train.iloc[train_index, :]
    y_create = labels.iloc[train_index]
        
    xgtrain = xgb.DMatrix(X_create, label=y_create)
    model = xgb.train(plst, xgtrain, num_rounds)
    model.dump_model('model{0}.txt'.format(k))
    k+=1